#!/usr/bin/env Rscript

args = commandArgs(trailingOnly=TRUE)
if (length(args)==0) {
  stop("Need to provide the path", call.=FALSE)
} else if (length(args)==1) {
  setwd(args[1])
}

library(tidyverse)
library(lubridate)
library(readxl)
library(foreign)
library(ggplot2)


##This is the r version of the Exiobase code, because it's waaaay easier to do it this way.

##The outputs are Kg per 2007 Million Euros
args = commandArgs(trailingOnly = "TRUE")


##For weights, you can either use final deamnd or total demand
  #total demand = final demand + indirect demand
##If you want final demand instead, switch total_demand to FALSE
set_total_demand_weights <- FALSE



##################Open the Leiontif Inverse and make long##################

l_inv <- read_delim("0_build/input/exiobase/mrIOT_IxI_fpa_coefficient_version2.2.2/L_inverse.txt",
                    delim=',', col_names = FALSE) %>%
  mutate(cXi_origin = row_number()) %>%
  gather(key = cXi_dest, value = total_requirements, -cXi_origin) %>%
  mutate(cXi_dest =as.numeric(str_replace(cXi_dest, "X", "")))


##################Open the emissions file, and calculate total emissions##################
emissions_open <- read_tsv("0_build/input/exiobase/mrIOT_IxI_fpa_coefficient_version2.2.2/mrEmissions_version2.2.2.txt",
                           col_names = T)

#first problem is emissions needs to be trasposed carefully, then keep columns that are relevant
emissions <- emissions_open %>%
  rename_all(
    funs(paste(str_sub(colnames(emissions_open),1,2),
               emissions_open %>%
                 filter(is.na(X1)) %>%
                 c()
               ,1:length(names(emissions_open)), sep = '_'))
  ) %>%
  filter((X1_NA_1=="CO2 - combustion"  & X2_NA_2=='air') |
           (X1_NA_1=="CO2 - non combustion"  & X2_NA_2=='air')) %>%
  select(-X2_NA_2, -X3_NA_3) %>%
  gather(var, value, -X1_NA_1) %>%
  spread(X1_NA_1, value) %>%
  separate(var, c('c_origin','i_origin', 'cXi_origin'), sep='_') %>%
  rename_at(vars(c("CO2 - combustion", "CO2 - non combustion")), ~ 
              c('co2_rate_direct1', 'co2_rate_direct2')) %>%
  mutate(cXi_origin = as.numeric(cXi_origin)-3,
         co2_rate_direct1 = as.numeric(co2_rate_direct1),
         co2_rate_direct2 = as.numeric(co2_rate_direct2) + co2_rate_direct1) %>%
  arrange(cXi_origin)

dest <- emissions %>%
  select(c_origin, i_origin, cXi_origin) %>%
  rename_at(vars(c('c_origin', 'i_origin', "cXi_origin")), ~ 
              c('c_dest', 'i_dest', "cXi_dest"))

    #JUST WONDERING WHAT IS THE HEAVIEST EMISSIONS LEVEL
    emmissions_us <- emissions %>%
      filter(c_origin=="US")
    #So I'm not going to use that for anything, but it is proof that exiobase really thinks n-fertilizer
    #is super emmissions intesne. I'm not sure that is right, but it is what exiobase reports. hmmm.
    
country_dest <- dest %>%
  group_by(c_dest) %>%
  summarise(cXi_dest=min(cXi_dest)) %>%
  arrange(cXi_dest) %>%
  mutate(c_dest_id=row_number()) %>%
  select(-cXi_dest)

emissions_total <- left_join(emissions, l_inv, by="cXi_origin") %>%
  left_join(dest, by='cXi_dest') %>%
  mutate(co2_rate_total1= co2_rate_direct1 * total_requirements,
         co2_rate_total2= co2_rate_direct2 * total_requirements) %>%
  filter(c_origin!='US' & c_dest!='US') %>%
  group_by(c_dest, i_dest, cXi_dest) %>%
  summarise_at(vars(co2_rate_total1, co2_rate_total2), funs(sum)) %>%
  rename_at(vars(c('c_dest', 'i_dest', "cXi_dest")), ~ 
              c('c_origin', 'i_origin', "cXi_origin")) %>%
  left_join(emissions, by=c('c_origin', 'i_origin', "cXi_origin"))

##################Open the final demand file and then calc indirect demand##################

demand <- read_tsv("0_build/input/exiobase/mrIOT_IxI_fpa_coefficient_version2.2.2/mrFinalDemand_version2.2.2.txt") %>%
  rename_at(vars(c('X1', 'X2')), ~ 
              c('c_origin', 'i_origin')) %>%
  select(-'X3') %>%
  gather(key = c_dest, value = demand, -c('c_origin', 'i_origin')) %>%
  separate(c_dest, c('c_dest','type'), sep='_') %>%
  filter(is.na(type) | type==1 | type==2,
         !is.na(c_origin)) %>%
  #need to bring back ids to make sure the matrix will be correct
  left_join(emissions %>% select(-co2_rate_direct1, -co2_rate_direct2), by = c("c_origin", "i_origin")) %>%
  group_by(c_origin, i_origin, c_dest, cXi_origin) %>%
  summarise(demand=sum(as.numeric(demand))) %>%
  #bring id for columns for the matrix
  left_join(country_dest, by = ('c_dest')) %>%
  arrange(cXi_origin, c_dest_id) %>%
  ungroup()

###########Here we bring in demand, create indepentdent demand, and then make emissions##########
  demand_matrix <- demand %>%
    select(-c_dest) %>%
    spread(c_dest_id,demand) %>%
    arrange(cXi_origin) %>%
    select(-cXi_origin, -c_origin, -i_origin) %>%
    data.matrix()
  
  #make the leontif inverse a matrix
  l_inv_matrix <- read_delim("0_build/input/exiobase/mrIOT_IxI_fpa_coefficient_version2.2.2/L_inverse.txt",
                             delim=',', col_names = FALSE) %>%
    data.matrix()
  
  #do matrix multiplication in order to create an indirect demand tibble
  ind_demand <-  l_inv_matrix %*% demand_matrix %>%
    as_tibble() %>%
    bind_cols(emissions %>% select(-co2_rate_direct1, -co2_rate_direct2)) %>%
    gather(key = c_dest_id, value = ind_demand, -c('c_origin', 'i_origin', 'cXi_origin')) %>%
    mutate(c_dest_id=as.integer(c_dest_id))

#create a list of the countires who already have cap and trade systems
  eu_ca <- c('AT','BE','BG','CY','CZ','DE','DK','EE','ES','FI',
             'FR','GR','HR','HU','IE','IT','LT','LU','LV','MT',
             'NL','PL','PT','RO','SE','SL','SK','GB','CA')
  
########Create a function#######
emissions_calc_func <- function(import=TRUE, total_demand_weights=FALSE){
  #attach demand to ind demand, add them together, keep imports to us,
  #then genrate weights (perc_import), then multiply by weights, and sum all together
  us_emissions <- demand %>%
    left_join(ind_demand, by=c('c_origin','i_origin','cXi_origin','c_dest_id')) %>%
    mutate(total_demand=ind_demand+demand) %>%
    filter(if (import==TRUE) c_dest=='US' & c_origin!='US' else
      c_dest!='US' & c_origin=='US') %>%
    group_by(i_origin) %>%
    mutate(i_total_demand=sum(total_demand),
           i_final_demand=sum(demand),
           total_weight = total_demand_weights,
           weight = if_else(total_weight==TRUE,
                            total_demand/i_total_demand,
                            demand/i_final_demand,
                            0),
           weight = if_else(is.na(weight),0,weight),
           import_var = import,
           c_origin = if_else(import_var==TRUE,c_origin,c_dest),
           #add in eu and canada-less weights
           total_demand_noeu = if_else(c_origin %in% eu_ca, 0, total_demand),
           demand_noeu = if_else(c_origin %in% eu_ca, 0, demand),
           i_total_demand_noeu=sum(total_demand_noeu),
           i_final_demand_noeu=sum(demand_noeu),
           weight_noeu = if_else(total_weight==TRUE,
                                 total_demand_noeu/i_total_demand_noeu,
                                 demand_noeu/i_final_demand_noeu,
                                 0),
           weight_noeu = if_else(is.na(weight_noeu),0,weight_noeu)) %>%
    select(c_origin, i_origin, weight, weight_noeu) %>%
    left_join(emissions_total, by=c('c_origin', 'i_origin')) %>%
    mutate_at(vars('co2_rate_total1','co2_rate_total2','co2_rate_direct1','co2_rate_direct2'),
              .funs=funs(yeseu=.*weight,noeu=.*weight_noeu)) %>%
    select(-weight, -weight_noeu,-cXi_origin, -c_origin,
           -co2_rate_total1,-co2_rate_total2,-co2_rate_direct1,-co2_rate_direct2) %>%
    group_by(i_origin) %>%
    summarise_all(sum)
  
  ##################Now, I need to bring in the crosswalk and switch to NAICS6##################
  #weight by demand
  demand_weight <- demand %>%
    left_join(ind_demand, by=c('c_origin','i_origin','cXi_origin','c_dest_id')) %>%
    mutate(total_weight = total_demand_weights,
           demand = if_else(total_weight==TRUE,
                            ind_demand,
                            demand),
           demand_noeu = if_else(c_origin %in% eu_ca | c_dest %in% eu_ca , 0, demand)) %>%
    filter(if (import==TRUE) c_dest=='US' & c_origin!='US' else
      c_dest!='US' & c_origin=='US') %>%
    group_by(i_origin) %>%
    summarise(demand = sum(demand), demand_noeu = sum(demand_noeu))
  
  crosswalk <- read_csv('0_build/input/crosswalks/naics_exiobase_crosswalk.csv') %>%
    rename(i_origin = exiobase_name)
  
  us_naics <- us_emissions %>%
    right_join(crosswalk, by = "i_origin") %>%
    #for the meat cats that have two exiobase cats
    left_join(demand_weight, by = "i_origin") %>%
    group_by(naics) %>%
    mutate(demand_sum=sum(demand), demand_noeu_sum=sum(demand_noeu)) %>%
    ungroup() %>%
    mutate_at(vars('co2_rate_total1_yeseu','co2_rate_total2_yeseu','co2_rate_direct1_yeseu','co2_rate_direct2_yeseu'),
              funs(.*(demand/demand_sum))) %>%
    mutate_at(vars('co2_rate_total1_noeu','co2_rate_total2_noeu','co2_rate_direct1_noeu','co2_rate_direct2_noeu'),
              funs(.*(demand_noeu/demand_noeu_sum))) %>%
    select(co2_rate_total1_yeseu,co2_rate_total2_yeseu,co2_rate_direct1_yeseu,co2_rate_direct2_yeseu,
           co2_rate_total1_noeu,co2_rate_total2_noeu,co2_rate_direct1_noeu,co2_rate_direct2_noeu,
           naics, naics_title, demand, demand_noeu) %>%
    group_by(naics, naics_title) %>%
    summarise_all(sum)
  
  if(import==TRUE){
    us_naics_rename <- us_naics %>%
      rename_at(vars(c('co2_rate_total1_yeseu','co2_rate_total2_yeseu',
                       'co2_rate_direct1_yeseu','co2_rate_direct2_yeseu',
                       'co2_rate_total1_noeu','co2_rate_total2_noeu',
                       'co2_rate_direct1_noeu','co2_rate_direct2_noeu','demand','demand_noeu')), ~ 
                  c('imp_co2_rate_total_combust_yeseu','imp_co2_rate_total_all_yeseu',
                    'imp_co2_rate_direct_combust_yeseu','imp_co2_rate_direct_all_yeseu',
                    'imp_co2_rate_total_combust_noeu','imp_co2_rate_total_all_noeu',
                    'imp_co2_rate_direct_combust_noeu','imp_co2_rate_direct_all_noeu','import_demand','import_demand_noeu'))
    print('import')
  } else {
    us_naics_rename <- us_naics %>%
      rename_at(vars(c('co2_rate_total1_yeseu','co2_rate_total2_yeseu',
                       'co2_rate_direct1_yeseu','co2_rate_direct2_yeseu',
                       'co2_rate_total1_noeu','co2_rate_total2_noeu',
                       'co2_rate_direct1_noeu','co2_rate_direct2_noeu','demand','demand_noeu')), ~ 
                  c('exp_co2_rate_total_combust_yeseu','exp_co2_rate_total_all_yeseu',
                    'exp_co2_rate_direct_combust_yeseu','exp_co2_rate_direct_all_yeseu',
                    'exp_co2_rate_total_combust_noeu','exp_co2_rate_total_all_noeu',
                    'exp_co2_rate_direct_combust_noeu','exp_co2_rate_direct_all_noeu','export_demand','export_demand_noeu'))
    print('export')
  }
  return(us_naics_rename)
}

  
##############Create import and export emissions################  
us_imp_naics <- emissions_calc_func(import=TRUE, total_demand_weights=set_total_demand_weights)
  
us_exp_naics <- emissions_calc_func(import=FALSE, total_demand_weights=set_total_demand_weights)
  
exiobase_output <- us_imp_naics %>%
  left_join(us_exp_naics, by = c('naics', 'naics_title')) %>%
  mutate(impexp_co2_rate_total_combust_yeseu= (exp_co2_rate_total_combust_yeseu*export_demand+
                                           imp_co2_rate_total_combust_yeseu*import_demand)/(export_demand+import_demand),
         impexp_co2_rate_total_all_yeseu= (exp_co2_rate_total_all_yeseu*export_demand+
                                       imp_co2_rate_total_all_yeseu*import_demand)/(export_demand+import_demand),
         impexp_co2_rate_direct_combust_yeseu= (exp_co2_rate_direct_combust_yeseu*export_demand+
                                            imp_co2_rate_direct_combust_yeseu*import_demand)/(export_demand+import_demand),
         impexp_co2_rate_direct_all_yeseu= (exp_co2_rate_direct_all_yeseu*export_demand+
                                        imp_co2_rate_direct_all_yeseu*import_demand)/(export_demand+import_demand),
         impexp_co2_rate_total_combust_noeu= (exp_co2_rate_total_combust_noeu*export_demand_noeu+
                                           imp_co2_rate_total_combust_noeu*import_demand_noeu)/(export_demand_noeu+import_demand_noeu),
         impexp_co2_rate_total_all_noeu= (exp_co2_rate_total_all_noeu*export_demand_noeu+
                                       imp_co2_rate_total_all_noeu*import_demand_noeu)/(export_demand_noeu+import_demand_noeu),
         impexp_co2_rate_direct_combust_noeu= (exp_co2_rate_direct_combust_noeu*export_demand_noeu+
                                            imp_co2_rate_direct_combust_noeu*import_demand_noeu)/(export_demand_noeu+import_demand_noeu),
         impexp_co2_rate_direct_all_noeu= (exp_co2_rate_direct_all_noeu*export_demand_noeu+
                                        imp_co2_rate_direct_all_noeu*import_demand_noeu)/(export_demand_noeu+import_demand_noeu)
  ) %>%
  select(-export_demand, -import_demand, -export_demand_noeu, -import_demand_noeu) %>%
  #make Joe's conversion (from Joe):
  #raw data on CO2 rates are in kg / million euros (see exiobase raw data)
  #1000 converts kg->tons; 1000000 converts million euros->euros; 
  #1.063 is euros/dollar; 1.1445 converts 2007 -> 2016 GDP deflator 
  #(=110.1/96.2 from https://data.worldbank.org/indicator/NY.GDP.DEFL.ZS?end=2016&start=2007)
  #mutate_at(vars(contains('co2')),
  #          funs(.*1.063 / (1000*1000000*1.1445)))
  # NEW PLAN: KG PER MILLION 2007 DOLLARS
  mutate_at(vars(contains('co2')),
            funs(.*1.063))


write.dta(exiobase_output, "0_build/output/exiobase/exiobase_emissions.dta")
